Our experiment design focused on obtaining sufficient power to detect
our main outcomes (i.e. identifying manipulative tactics), while also
accommodating several HTE analyses on priority subgroups.
lm_interacted_model <- function(N,
# overall sample size
p_list,
# probability of treatment assignment,
# separate entry for each factor,
# entries will be normalized to sum to 1
b0,
# baseline mean outcome (i.e., mean for the control)
b_main_list,
# main effects above baseline,
# separate entry for each factor level
b_2wy_list,
# 2-way interactions
sigma,
# SD, used to construct residual error,
fmla = NULL
# formula for test we are performing
) {
K <- length(p_list) # number of factors
degree = 2 # max order of interaction in the model
# Count number of levels in each factor
L_list <- list()
for(k in 1:length(p_list)){
L_list[[paste0('L', k)]] <- length(p_list[[k]]) # naming the levels
}
number_marginal = n #(sum(unlist(L_list))-length(L_list)) # number of marginal effects
# complete randomization of treatment assignment
wmat <- as.data.frame(lapply(1:K, function(k)
factor(
complete_ra(N,
num_arms = L_list[[k]],
prob_each = p_list[[k]]),
levels = paste0('T', 1:L_list[[k]]),
labels = 1:L_list[[k]]
) ))
# add names for the treatment assignment
colnames(wmat) <- paste0('w', 1:K, '.')
# simulation based on provided data generation process
if (is.null(fmla)) fmla = paste0('~.^', degree)
mmat <- model.matrix(fmla , as.data.frame(wmat)) # indicators for whether to include w1, w2 and the interaction
betas <- unlist(c(b0, b_main_list, b_2wy_list)) # define the betas
y <- as.vector(mmat %*% betas + rnorm(N, sd = sigma)) # simulate outcomes
# analysis of outcomes by including interaction in the model
lm_long <- lm_robust(y~., data = as.data.frame(cbind(y, mmat[,-1]))) # regression of simulated outcome on all the indicators except intercept
# analysis of outcomes by excluding interactions in the model
lm_short <- lm_robust(y~., data = as.data.frame(cbind(y, mmat[,hte]))) # regression of simulated outcome on all the indicators except intercept and interaction
return(list(lm_long,lm_short))
}
#### POWER CALCULATION
set.seed(103851)
# definition
possible.ns <- seq(from = 100, to = 2000, by = 100) # possible total sample size
power.interaction <- rep(NA, length(possible.ns)) # save power to detect the specified interaction effect
power.onemarginal <- rep(NA, length(possible.ns)) # save power to detect at least one of the marginal effects
power.allmarginals <- rep(NA, length(possible.ns)) # save power to detect all marginal effects, with Bonferroni correction
power.all_FDR <- rep(NA, length(possible.ns)) # save power to detect all marginal effects, with FDR control
alpha <- 0.05 # significance level
sims <- 100 # 100 simulations
# based on pilot data
b0 <- 1.525574 # mean of control # 2.542623
sigma <- 2.520098 # SD of control # 3.253432
## YOU NEED TO EDIT THE FOLLOWING PARAMETERS:
p_list <- list(c(1/2, 1/2),
c(168/291, 123/291),
c(119/291, 172/291),
c(190/291, 101/291))
no_hte = sum(sapply(p_list[-1], function(a) length(a)-1))
b_main_list <- list('w1.2' = 0.153) # Tactics Course
# what is a meaningful interaction to test?
b_2wy_list <- list('w1.1:w2.2' = 1.158, # Income
'w1.2:w2.2' = 0.153,
'w1.1:w3.2' = 1.09, # Reflect
'w1.2:w3.2' = 0.153,
'w1.1:w4.2' = 0.68, # Ideology
'w1.2:w4.2' = 0.153
)
fmla = formula('~w1. + w1.:w2. + w1.:w3. + w1.:w4. ') # formula we are testing for to make mmat
n <- length(b_2wy_list) + length(b_main_list) # total number of hypotheses
m <- 3 # index of which interaction to test (here, Tactics Course & quiz)
hte <- 2*(2:(no_hte+1))
hypotheses <- rep("two.tailed", n) # hypothesis type for marginal effect 1, marginal effect 2, and interaction effect, can be "two.tailed", "greater", or "lower"
## END OF PARAMETERS
#### Outer loop to vary the experiment size
for (j in 1:length(possible.ns)) {
N <- possible.ns[j]
# Count number of levels in each factor
L_list <- list()
for(k in 1:length(p_list)){
L_list[[paste0('L', k)]] <- length(p_list[[k]]) # naming the levels
}
number_marginal = n #(sum(unlist(L_list))-length(L_list)) # number of marginal effects
# hold the p values and coefficients from both long and short models
pvec <- cvec <- matrix(NA, sims, n)
pvec_s <- cvec_s <- matrix(NA, sims, no_hte)
#### Inner loop to conduct experiments "sims" times over for each N ####
for (i in 1:sims) {
# apply the analysis function defined above
fits <- lm_interacted_model(N,
p_list = p_list, # randomization probs
b0 = b0, # intercept
b_main_list = b_main_list, # main effects
b_2wy_list = b_2wy_list, # 2-way
sigma = sigma,
fmla = fmla)
fit0 <- fits[[1]] # long model with interaction
fit1 <- fits[[2]] # short model without interaction # TODO: not used not, check back see if needed
### To capture coefficients and pvalues, according to the hypothesis type
for(h in 1:n){
if(hypotheses[h] == 'two.tailed'){
pvec[i,h] <- summary(fit0)$coefficients[h + 1, 4] # pvalues for the h-th indicator (+1 due to intercept), 4th column: p-value for a two-sided test
cvec[i,h] <- TRUE # check if sign of coefficient is consistent with the hypothesis
} else if (hypotheses[h] == 'greater'){
pvec[i,h] <- pt(coef(summary(fit0))[h + 1, 3], fit0$df[h + 1], # 3rd column: t-stat
lower.tail = FALSE
)
cvec[i,h] <- summary(fit0)$coefficients[h + 1, 1]>0 # greater: >0
} else if (hypotheses[h] == 'lower'){
pvec[i,h] <- pt(coef(summary(fit0))[h + 1, 3], fit0$df[h + 1],
lower.tail = TRUE)
cvec[i,h] <- summary(fit0)$coefficients[h + 1, 1]<0 # lower: <0
}
}
# from short model without interactions
for(s in 1:length(hte)){
if(hypotheses[s] == 'two.tailed'){
pvec_s[i,s] <- summary(fit1)$coefficients[s + 1, 4]
cvec_s[i,s] <- TRUE
} else if (hypotheses[s] == 'greater'){
pvec_s[i,s] <- pt(coef(summary(fit1))[s + 1, 3], fit1$df[s + 1],
lower.tail = FALSE
)
cvec_s[i,s] <- summary(fit1)$coefficients[s + 1, 1]>0
} else if (hypotheses[s] == 'lower'){
pvec_s[i,s] <- pt(coef(summary(fit1))[s + 1, 3], fit1$df[s + 1],
lower.tail = TRUE)
cvec_s[i,s] <- summary(fit1)$coefficients[s + 1, 1]<0
}
}
}
# power for detecting the chosen interaction with index m
power.interaction[j] <- mean(sapply(1:sims, function(x)
cvec[x, m]*(pvec[x, m]<alpha)
)) # get pvalues and coefficients of the relevant interaction term
# power for detecting at least one marginal effect
power.onemarginal[j] <- mean(sapply(1:sims, function(x)
max(cvec_s[x,]*( pvec_s[x,]<alpha/no_hte ))==1)) # Bonferroni or FDR
# power for detecting all marginal effects
power.allmarginals[j] <- mean(sapply(1:sims, function(x)
all(cvec_s[x,]*( pvec_s[x,]<(alpha/no_hte ))) )) # Bonferroni
# note that power for detecting at least one is the same with Bonferroni or FDR, but not the power for detecting all effects
power.all_FDR[j] <- mean(sapply(1:sims, function(x)
all(cvec_s[x,]*(pvec_s[x,]<alpha)) )) # FDR - cutoff for the max pvalue is alpha
}
# save simulated power data
gg_df2 <- data.frame(
N = possible.ns,
Interaction = power.interaction,
`At least one` = power.onemarginal,
`All with Bonferroni` = power.allmarginals,
`All with FDR` = power.all_FDR
)
# start the plot
gg_df2 <- gg_df2 %>% melt(
id.vars = "N", value.name = "Power", # the y-axis
variable.name = "Type" # legend
)
# plotting power against sample size by type of power
ggplot(data = gg_df2, aes(x = N, y = Power, group = Type, col = Type)) + # power against size
theme_minimal() +
geom_point() +
# vertical line indicating sample size where power first exceeds 0.8
geom_segment(aes(
x = possible.ns[min(which(power.interaction>0.8))],
y = 0,
xend = possible.ns[min(which(power.interaction>0.8))],
yend = 0.8
),
data = gg_df2, colour = "blue", lty = "dashed"
) +
# horizontal line indicating sample size where power first exceeds 0.8
geom_segment(aes(
x = min(possible.ns),
y = 0.8,
xend = possible.ns[min(which(power.interaction>0.8))],
yend = 0.8
),
data = gg_df2, colour = "blue", lty = "dashed"
) +
# vertical line indicating sample size where power first exceeds 0.8
geom_segment(aes(
x = possible.ns[min(which(power.allmarginals>0.8))],
y = 0,
xend = possible.ns[min(which(power.allmarginals>0.8))],
yend = 0.8
),
data = gg_df2, colour = "blue", lty = "dashed"
) +
# horizontal line indicating sample size where power first exceeds 0.8
geom_segment(aes(
x = min(possible.ns),
y = 0.8,
xend = possible.ns[min(which(power.allmarginals>0.8))],
yend = 0.8
),
data = gg_df2, colour = "blue", lty = "dashed"
) +
# vertical line indicating sample size where power first exceeds 0.8
geom_segment(aes(
x = possible.ns[min(which(power.all_FDR>0.8))],
y = 0,
xend = possible.ns[min(which(power.all_FDR>0.8))],
yend = 0.8
),
data = gg_df2, colour = "blue", lty = "dashed"
) +
# horizontal line indicating sample size where power first exceeds 0.8
geom_segment(aes(
x = min(possible.ns),
y = 0.8,
xend = possible.ns[min(which(power.all_FDR>0.8))],
yend = 0.8
),
data = gg_df2, colour = "blue", lty = "dashed"
) +
# vertical line indicating sample size where power first exceeds 0.8
geom_segment(aes(
x = possible.ns[min(which(power.onemarginal>0.8))],
y = 0,
xend = possible.ns[min(which(power.onemarginal>0.8))],
yend = 0.8
),
data = gg_df2, colour = "blue", lty = "dashed"
) +
# horizontal line indicating sample size where power first exceeds 0.8
geom_segment(aes(
x = min(possible.ns),
y = 0.8,
xend = possible.ns[min(which(power.onemarginal>0.8))],
yend = 0.8
),
data = gg_df2, colour = "blue", lty = "dashed"
) +
scale_y_continuous(breaks = seq(0.2, 1, .2)) + # y axis scale +
ggtitle('Power analysis without pre-post test design for main outcome & 3 HTE')
We performed a simulation using the pilot experiment design, testing
for a statistically significant change in the confidence weighted
accuracy score and 3 HTEs (4 outcomes in total) using only 1 treatment
arm (Tactics). As shown in the figure above, a power analysis for the
outcomes of interest suggests that 1,500 participants would give us less
than 20% power for each hypothesis. (Since we are interested in being
able to give precise recommendations, the “at least one”, the only
powered approach is not appropriate for this context). This prompted us
to modify our experiment to include a pre-post test design.